Tidy Tuesday Series

2018 Week 33 - Malaria Data

lruolin
06-20-2021

Load Packages

Load Data from tidytuesdayR package

# to download data
tt_data <- tt_load(2018, week = 33)


# to view readme
readme(tt_data)

# Save data as objects
incidence <- tt_data$malaria_inc
deaths <- tt_data$malaria_deaths
deaths_age <- tt_data$malaria_deaths_age 
data_deaths <- read_csv("malaria_deaths.csv")
data_deaths_age <- read_csv("malaria_deaths_age.csv")
data_incidence <- read_csv("malaria_incidence.csv")

Summarise by year over time on a map

# prevelance/parasite rate
kenya_pr <- getPR(ISO = "KEN", # KENYA
            species = "BOTH") %>% 
  filter(!is.na(pr))

glimpse(kenya_pr)
Rows: 1,855
Columns: 28
$ dhs_id                    <chr> "", "", "", "", "", "", "", "", ""…
$ site_id                   <int> 13580, 8231, 22331, 16507, 4231, 1…
$ site_name                 <chr> "Kora Kora", "Ulutya Primary Schoo…
$ latitude                  <dbl> -0.6097, -0.9724, -3.8442, -1.3149…
$ longitude                 <dbl> 39.7807, 37.6902, 39.7527, 36.8112…
$ rural_urban               <chr> "UNKNOWN", "RURAL", "UNKNOWN", "UR…
$ country                   <chr> "Kenya", "Kenya", "Kenya", "Kenya"…
$ country_id                <chr> "KEN", "KEN", "KEN", "KEN", "KEN",…
$ continent_id              <chr> "Africa", "Africa", "Africa", "Afr…
$ month_start               <int> 5, 10, 5, 7, 11, 3, 7, 8, 5, 9, 5,…
$ year_start                <int> 1994, 2009, 2009, 2009, 2009, 1995…
$ month_end                 <int> 5, 10, 5, 7, 11, 3, 7, 8, 5, 9, 5,…
$ year_end                  <int> 1994, 2009, 2009, 2009, 2009, 1995…
$ lower_age                 <dbl> 0.0, 5.0, 0.6, 4.0, 5.0, 0.0, 0.0,…
$ upper_age                 <int> 6, 17, 8, 15, 17, 4, 9, 4, 14, 15,…
$ examined                  <int> 270, 109, 11, 93, 110, 168, 133, 2…
$ positive                  <dbl> 36, 0, 2, 2, 48, 107, 111, 1, 4, 4…
$ pr                        <dbl> 0.1333, 0.0000, 0.1818, 0.0215, 0.…
$ species                   <chr> "P. falciparum", "P. falciparum", …
$ method                    <chr> "Microscopy", "RDT", "Microscopy",…
$ rdt_type                  <chr> "", "Paracheck PF - Rapid test for…
$ pcr_type                  <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ malaria_metrics_available <chr> "true", "true", "true", "true", "t…
$ location_available        <chr> "true", "true", "true", "true", "t…
$ permissions_info          <chr> "", "", "", "", "", "", "", "", ""…
$ citation1                 <chr> " (1994). <i>Vitamin A deficiency …
$ citation2                 <chr> "", "Gitonga, CW, Karanja, PN, Kih…
$ citation3                 <chr> "", "", "", "", "", "", "", "", ""…
# check
table(is.na(kenya_pr$pr))

FALSE 
 1855 
# plot
kenya_pr %>% 
  group_by(year_start) %>% 
  dplyr::summarise(examined = sum(examined),
            positive = sum(positive),
            studies = n()) %>% 
  mutate (pr = positive/examined) %>% 
  ggplot(aes(year_start, pr)) +
  geom_line() +
  labs(title = "Change in Prevalance Rate (Positive/Examined) rate over the years",
       subtitle = "Prevalance rate decreased over the years",
       x = "Year",
       y = "Prevalance Rate",
       caption = "Source: mariaAtlas package") +
  theme_few()

kenya_pr %>% 
  arrange(pr) %>% 
  ggplot(aes(longitude, latitude, col = pr)) +
  geom_point() +
  borders("world", regions = "Kenya") +
  scale_colour_gradient2(low = "blue", high = "red", 
                         midpoint = 0.5, 
                         labels = scales::percent_format()) +
  labs(title = "Prevalence of Malaria in Kenya",
       caption = "Source: mariaAtlas package") +
  coord_map() +
  theme_void()

Aggregate Prevalence by decade

kenya_pr %>% 
  group_by(decade = 10 * (year_start %/% 10)) %>% 
  arrange(pr) %>% 
  ggplot(aes(longitude, latitude, col = pr)) +
  geom_point() +
  borders("world", regions = "Kenya") +
  scale_colour_gradient2(low = "blue", high = "red", 
                         midpoint = 0.5, 
                         labels = scales::percent_format()) +
  labs(title = "Prevalence of Malaria in Kenya, by decade",
       caption = "Source: mariaAtlas package",
       col = "Prevalence") +
  coord_map() +
  facet_wrap ( ~decade) +
  theme_void()

Incidence

Looking at aggregated data

glimpse(data_incidence)
Rows: 508
Columns: 4
$ Entity                                                                               <chr> …
$ Code                                                                                 <chr> …
$ Year                                                                                 <dbl> …
$ `Incidence of malaria (per 1,000 population at risk) (per 1,000 population at risk)` <dbl> …
# change column names
malaria_inc_processed <- data_incidence %>% 
  setNames(c("country", "code", "year", "incidence")) %>% 
  mutate(incidence = incidence /1000)

malaria_inc_processed%>% 
  filter(country %in% sample(unique(country), 6)) %>% 
  ggplot(aes(year, incidence, col = country)) +
  geom_line() +
  scale_y_continuous(labels = scales::percent_format()) +
  theme_few()

Looking at 2015 levels and the change from 2000 to 2015
malaria_spread <- malaria_inc_processed %>% 
  mutate(year = paste0("Y",year)) %>% 
  pivot_wider(names_from = year,
              values_from = incidence) %>% 
  mutate(current = Y2015,
         change = Y2015 - Y2000)


malaria_spread %>% 
  filter(country != "Turkey", # outlier
         !is.na(code)) %>%  # no country code
  ggplot(aes(current, change)) +
  geom_point() +
  geom_text(aes(label = code), vjust = 1, hjust = 1) +
  theme_few()

# what countries are not in the map data?
malaria_spread %>% 
  anti_join(map_data("world"), by = c(country = "region"))
# A tibble: 32 x 8
   country          code   Y2000  Y2005  Y2010   Y2015 current  change
   <chr>            <chr>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>
 1 Congo            COG   0.364  0.350  0.217  0.173   0.173   -0.190 
 2 Cote d'Ivoire    CIV   0.525  0.531  0.446  0.349   0.349   -0.177 
 3 Democratic Repu… COD   0.508  0.525  0.427  0.246   0.246   -0.262 
 4 Early-demograph… <NA>  0.0837 0.0616 0.0475 0.0289  0.0289  -0.0548
 5 East Asia & Pac… <NA>  0.0227 0.0207 0.0201 0.00570 0.00570 -0.0170
 6 East Asia & Pac… <NA>  0.0228 0.0211 0.0205 0.00580 0.00580 -0.0170
 7 East Asia & Pac… <NA>  0.0228 0.0207 0.0202 0.00572 0.00572 -0.0171
 8 Fragile and con… <NA>  0.319  0.305  0.247  0.180   0.180   -0.139 
 9 Heavily indebte… <NA>  0.408  0.326  0.274  0.198   0.198   -0.209 
10 IBRD only        <NA>  0.0355 0.0383 0.0269 0.0152  0.0152  -0.0203
# … with 22 more rows
maps::iso3166 %>% 
  as_tibble()
# A tibble: 269 x 5
   a2    a3    ISOname            mapname           sovereignty       
   <chr> <chr> <chr>              <chr>             <chr>             
 1 AW    ABW   Aruba              Aruba             Netherlands       
 2 AF    AFG   Afghanistan        Afghanistan       Afghanistan       
 3 AO    AGO   Angola             Angola            Angola            
 4 AI    AIA   Anguilla           Anguilla          Anguilla          
 5 AX    ALA   Aland Islands      Finland:Aland Is… Finland           
 6 AL    ALB   Albania            Albania           Albania           
 7 AD    AND   Andorra            Andorra           Andorra           
 8 AE    ARE   United Arab Emira… United Arab Emir… United Arab Emira…
 9 AR    ARG   Argentina          Argentina         Argentina         
10 AM    ARM   Armenia            Armenia           Armenia           
# … with 259 more rows
world <- map_data("world") %>% 
  filter(region != "Antarctica")

data_plot <- malaria_inc_processed %>% 
  filter(incidence < 1) %>% 
  inner_join(maps::iso3166 %>% 
              select(a3, mapname), 
              by = c(code = "a3")) %>% 
  inner_join(world, by = c(mapname = "region")) 

glimpse(data_plot)
Rows: 147,970
Columns: 10
$ country   <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgh…
$ code      <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "…
$ year      <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 20…
$ incidence <dbl> 0.1071, 0.1071, 0.1071, 0.1071, 0.1071, 0.1071, 0.…
$ mapname   <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afgh…
$ long      <dbl> 74.89131, 74.84023, 74.76738, 74.73896, 74.72666, …
$ lat       <dbl> 37.23164, 37.22505, 37.24917, 37.28564, 37.29072, …
$ group     <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ order     <int> 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24…
$ subregion <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
data_plot %>% 
  ggplot(aes(long, lat, group = group, fill = incidence)) +
  geom_polygon() +
  scale_fill_gradient2(low = "blue",
                       high = "red",
                       midpoint = 0.2,
                       labels = scales::percent_format()) +
  facet_wrap( ~ year) +
  coord_map() +
  labs(title = "Malaria incidence over time around the world",
       subtitle = "Malaria incidence had generally decreased over time.", 
       fill = "Incidence",
       caption = "Source: malariaAtlas package") +
  theme_void() +
  theme(strip.text = element_text(face = "bold", size = 14),
        title = element_text(face = "bold", size = 16))

Deaths

glimpse(data_deaths)
Rows: 6,156
Columns: 4
$ Entity                                                                             <chr> …
$ Code                                                                               <chr> …
$ Year                                                                               <dbl> …
$ `Deaths - Malaria - Sex: Both - Age: Age-standardized (Rate) (per 100,000 people)` <dbl> …
# change column names
malaria_deaths_processed <- data_deaths %>% 
  setNames(c("country", "code", "year", "deaths")) 

glimpse(malaria_deaths_processed)
Rows: 6,156
Columns: 4
$ country <chr> "Afghanistan", "Afghanistan", "Afghanistan", "Afghan…
$ code    <chr> "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AFG", "AF…
$ year    <dbl> 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998…
$ deaths  <dbl> 6.802930, 6.973494, 6.989882, 7.088983, 7.392472, 7.…

References

https://www.youtube.com/watch?v=5_6O2oDy5Jk&list=PL19ev-r1GBwkuyiwnxoHTRC8TTqP8OEi8&index=77

Citation

For attribution, please cite this work as

lruolin (2021, June 20). pRactice corner: Tidy Tuesday Series. Retrieved from https://lruolin.github.io/myBlog/posts/20210620_Tidytuesday malaria data/

BibTeX citation

@misc{lruolin2021tidy,
  author = {lruolin, },
  title = {pRactice corner: Tidy Tuesday Series},
  url = {https://lruolin.github.io/myBlog/posts/20210620_Tidytuesday malaria data/},
  year = {2021}
}